Run t-SNE on CD4+ events which express one or more cytokines. Repeat for Non-CD4 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”
Stratify by Cohort and Antigen (S1, S2, NCAP, VEMP)
Color by
- Degree of functionality
- CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?
Also:
- Given the elevated IL2 in DMSO Hosp, it might actually also be interesting to take a look at the DMSO signal.
- Take a look at the difference in non-bg-corrected proportions and bg-corrected-proportions in order to get an idea of what amount of the t-SNE data might be background noise.
- Note that the term Not-CD4 is a bit ambiguous. The “4+” gate contains “CD4+CD8-” events, and the “NOT4+” gate contains “CD4-CD8+/-” events.
library(openCyto)
library(CytoML) # 1.12.0
library(flowCore) # required for description()
library(flowWorkspace) # required for gh_pop_get_data()
library(here)
library(tidyverse)
library(Rtsne)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200617
save_output <- TRUE
rerun_tsne <- TRUE
gs <- load_gs(here::here("out/GatingSets/20200611_HAARVI_ICS_GatingSet_AllBatches"))
## loading R object...
## loading tree object...
## Done
dput(gh_get_pop_paths(gs))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S",
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/CD38+",
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF")
Perform CD4 analysis all the way through first.
cd4_gates_for_tsne <- c(
"/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
"/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
"/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")
Still need to add boolean gates.
gs_pop_add(gs, eval(substitute(flowWorkspace::booleanFilter(v),
list(v = as.symbol(paste(cd4_cytokine_gates, collapse="|"))))),
parent = "/Time/LD-3+/1419-3+/S/Lymph/4+", name = "CD4_CytokinePos")
## [1] 29
recompute(gs, "/Time/LD-3+/1419-3+/S/Lymph/4+")
## ..................................................................................................................................................................................................................................................................................................................................................................................................................done!
cd4_cytokine_pos_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_CytokinePos"
pop_counts <- pData(gs) %>%
left_join(gs_pop_get_count_fast(gs, subpopulations = cd4_cytokine_pos_parentGate),
by = c("rowname" = "name")) %>%
dplyr::rename(CD4_CytokinePos = Count) %>%
dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_CytokinePos)
37C, 116C (DMSO), and 07B Spike 2 were filtered out for COMPASS due to low cell counts.
37C also has ~2-5% events in Live CD3+ gate. 07B Spike 2 has low-ish 13% Live CD3+ gate membership.
Include them all here, but note that for some of them, the low event counts means they will be under-represented in t-SNE.
head(pop_counts)
## rowname Batch SAMPLE ID STIM Cohort CD4_CytokinePos
## 1 112405.fcs_332150 1 15545 DMSO Hospitalized 694
## 2 112407.fcs_341600 1 15545 SEB Hospitalized 38139
## 3 112409.fcs_398100 1 15545 VEMP Hospitalized 954
## 4 112411.fcs_348925 1 15545 Spike 1 Hospitalized 1022
## 5 112413.fcs_296775 1 15545 Spike 2 Hospitalized 1004
## 6 112415.fcs_378400 1 15545 NCAP Hospitalized 1359
sub_group_size_calculations <- pop_counts %>%
dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>% # TRIMAS are NA
group_by(Batch, Cohort, STIM) %>%
dplyr::summarise(n_Patients = n(),
min_CD4_CytokinePos = min(CD4_CytokinePos),
total_CD4_CytokinePos = sum(CD4_CytokinePos),
n_x_min_CD4_CytokinePos = n_Patients * min_CD4_CytokinePos)
sub_group_size_calculations %>%
arrange(n_x_min_CD4_CytokinePos) %>%
head() %>%
knitr::kable()
| Batch | Cohort | STIM | n_Patients | min_CD4_CytokinePos | total_CD4_CytokinePos | n_x_min_CD4_CytokinePos |
|---|---|---|---|---|---|---|
| 2 | Non-hospitalized | Spike 1 | 13 | 13 | 4317 | 169 |
| 2 | Non-hospitalized | Spike 2 | 13 | 15 | 1763 | 195 |
| 2 | Non-hospitalized | NCAP | 13 | 30 | 3860 | 390 |
| 2 | Non-hospitalized | VEMP | 13 | 36 | 11120 | 468 |
| 2 | Hospitalized | Spike 2 | 7 | 120 | 1495 | 840 |
| 1 | Hospitalized | Spike 2 | 6 | 171 | 2968 | 1026 |
Above: Based on the n_x_min_CD4_CytokinePos column, if we maintained equal representation of the patients within each group (by batch, cohort, and stim) there would only be 169 events per group, leading to a total of only 169x3x4 = 2028 events.
The limiting group is: B2, Non-hospitalized, Spike 1
sub_group_size_calculations %>%
arrange(total_CD4_CytokinePos) %>%
head() %>%
knitr::kable()
| Batch | Cohort | STIM | n_Patients | min_CD4_CytokinePos | total_CD4_CytokinePos | n_x_min_CD4_CytokinePos |
|---|---|---|---|---|---|---|
| 2 | Hospitalized | Spike 2 | 7 | 120 | 1495 | 840 |
| 2 | Non-hospitalized | Spike 2 | 13 | 15 | 1763 | 195 |
| 1 | Hospitalized | Spike 2 | 6 | 171 | 2968 | 1026 |
| 2 | Hospitalized | NCAP | 7 | 312 | 3431 | 2184 |
| 3 | Hospitalized | Spike 2 | 7 | 239 | 3546 | 1673 |
| 1 | Non-hospitalized | Spike 2 | 13 | 130 | 3656 | 1690 |
Above: If we allow unequal patient representation, we can have up to 1495 events per group.
Let’s settle for 1000x*3x4 = 12000 events total in the t-SNE run. So, 1000 events per group.
cd4_cytokinepos_sampleSizes <- pop_counts %>%
dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>%
group_by(Batch, Cohort, STIM) %>%
nest() %>%
ungroup() %>%
mutate(nsamp = map2(1000, data, function(totalEvents, df) {
distributeEvents(totalEvents, df$CD4_CytokinePos)
})) %>%
unnest(cols = c(data, nsamp))
Make sure all groups are the same size
cd4_cytokinepos_sampleSizes %>%
group_by(Batch, Cohort, STIM) %>%
summarise(nsamp_sum = sum(nsamp)) %>%
dplyr::pull(nsamp_sum) %>%
unique()
## [1] 1000
But keep in mind that there is lopsided patient representation even within groups:
cd4_cytokinepos_sampleSizes %>%
arrange(nsamp) %>%
head(n=10)
## # A tibble: 10 x 7
## Batch STIM Cohort rowname `SAMPLE ID` CD4_CytokinePos nsamp
## <dbl> <chr> <chr> <chr> <chr> <int> <dbl>
## 1 2 Spike 1 Non-hospitali… 113851.fcs_27… 37C 13 13
## 2 2 Spike 2 Non-hospitali… 113853.fcs_23… 37C 15 15
## 3 2 NCAP Non-hospitali… 113855.fcs_20… 37C 30 30
## 4 2 VEMP Non-hospitali… 113849.fcs_25… 37C 36 36
## 5 2 Spike 2 Non-hospitali… 113685.fcs_13… 80C 55 55
## 6 2 Spike 2 Non-hospitali… 113781.fcs_12… 76C 61 61
## 7 1 VEMP Non-hospitali… 112629.fcs_32… 162C 893 76
## 8 1 Spike 1 Non-hospitali… 112643.fcs_23… 169C 743 76
## 9 1 Spike 2 Non-hospitali… 112488.fcs_32… 141C 311 76
## 10 1 NCAP Non-hospitali… 112859.fcs_36… 51C 708 76
cd4_cytokinepos_sampleSizes_4plot <- cd4_cytokinepos_sampleSizes %>%
mutate(the_label = ifelse(nsamp < 50,
paste0("Batch ", Batch, ", ", `SAMPLE ID`, ", ", STIM), NA)) %>%
mutate(Cohort = factor(Cohort,
levels = c("Non-hospitalized", "Hospitalized"),
labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_cytokinepos_sampleSizes_4plot,
aes(factor(Cohort), nsamp, label = the_label)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.15, height = 0) +
theme_bw(base_size=20) +
labs(title="ICS CD4 t-SNE patient representation",
y="Events Sampled for t-SNE") +
geom_text() +
facet_grid(Batch ~ STIM) +
theme(axis.title.x = element_blank())
## Warning: Removed 232 rows containing missing values (geom_text).
Reproducibility is important
set.seed(date)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName, currentSampleSize) {
# print(sprintf("Sampling data from %s", currentSampleName))
sampleGatingHierarchy(gs[[currentSampleName]], cd4_cytokine_pos_parentGate, currentSampleSize, cd4_gates_for_tsne)
}
cd4_cytokinepos_data4tsne <- map2_dfr(cd4_cytokinepos_sampleSizes$rowname, cd4_cytokinepos_sampleSizes$nsamp,
call_sampleGatingHierarchy_for_cd4)
knitr::kable(head(cd4_cytokinepos_data4tsne))
| name | EXPERIMENT NAME | $DATE | SAMPLE ID | PATIENT ID | STIM | WELL ID | PLATE NAME | filename | rowname | Sample ID | Cohort | Age | Sex | Race | Hispanic? | Days symptom onset to visit 1 | Pair ID | Batch | /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_CytokinePos | /Time/LD-3+/1419-3+/S/Lymph/4+/107a | /Time/LD-3+/1419-3+/S/Lymph/4+/154 | /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ | /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ | /Time/LD-3+/1419-3+/S/Lymph/CD38+ | /Time/LD-3+/1419-3+/S/Lymph/HLADR+ | Time | FSC-A | FSC-H | SSC-A | SSC-H | CD8b BB700 | TNFa FITC | CD107a PE-Cy7 | CD154 PE-Cy5 | CD3 ECD | IL2 PE | CD4 APC-H7 | IL17a Ax700 | IL4/5/13 APC | CD14/CD19 BV785 | CCR7 BV711 | CD38 BV605 | L/D | IFNg V450 | CD45RA BUV737 | HLADR BUV395 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 23.94000 | 78295.2 | 63400 | 13757.31 | 13208 | 780.8013 | 182.53427 | 648.0172 | 1346.5171 | 2728.276 | 2440.7312 | 1567.578 | 474.6469 | 650.5800 | 928.9816 | 1786.755 | 2063.9739 | 844.7427 | 680.6394 | 2495.806 | 259.3407 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 65.03799 | 101052.6 | 86636 | 18849.42 | 18764 | 704.6745 | 344.05429 | 602.1383 | 1278.8933 | 3131.226 | 623.1863 | 1887.669 | 2623.1912 | 452.9386 | 1564.1038 | 3225.979 | 2081.0901 | 1191.9154 | 456.8972 | 2579.319 | 651.7107 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 21.97200 | 146510.5 | 123037 | 26374.05 | 25209 | 572.1346 | -13.31206 | 1489.6616 | 866.1031 | 2911.237 | 577.0376 | 1525.547 | 952.7986 | 694.1076 | 873.2791 | 2209.903 | 887.0472 | 1413.6069 | 705.7365 | 1063.861 | 854.5002 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 40.56900 | 119913.6 | 100572 | 20781.69 | 19036 | 592.0034 | 661.64246 | 2065.9065 | 900.2537 | 2759.695 | 778.2350 | 1830.089 | 1198.0159 | 535.0631 | 882.6482 | 2149.851 | 1145.0156 | 1672.9725 | 859.9519 | 1517.605 | 735.4972 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 60.07300 | 104987.9 | 91785 | 18149.07 | 17661 | 632.3518 | 574.70831 | 2038.7100 | 857.8076 | 2891.542 | 690.3101 | 1556.152 | 1083.3341 | 609.9719 | 429.5510 | 2406.151 | 1614.5096 | 1753.3729 | 678.1209 | 2451.481 | 500.9050 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 49.07500 | 104532.7 | 89304 | 18408.33 | 17692 | 544.7924 | 564.35364 | 1777.3187 | 902.0987 | 2861.205 | 782.8384 | 1633.887 | 1280.8867 | 420.9029 | 1127.9441 | 2312.682 | 1331.2260 | 1601.4375 | 847.6118 | 1514.634 | 571.2283 |
dim(cd4_cytokinepos_data4tsne)
## [1] 24000 52
Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.
cytokine_dominance <- cd4_cytokinepos_data4tsne %>%
group_by(Batch) %>%
summarise_at(cd4_cytokine_gates, sum) %>%
t() %>%
as.data.frame() %>%
set_names(c("B1", "B2", "B3")) %>%
rownames_to_column("Cytokine_Gate") %>%
dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
| Cytokine_Gate | B1 | B2 | B3 |
|---|---|---|---|
| /Time/LD-3+/1419-3+/S/Lymph/4+/107a | 1579 | 2177 | 1681 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/154 | 2249 | 3587 | 2298 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | 1330 | 1398 | 931 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | 2070 | 2419 | 1992 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | 2356 | 265 | 1805 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | 815 | 1649 | 1429 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | 1988 | 2942 | 2315 |
cytokine_dominance %>%
pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>%
mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>%
ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
theme_bw(base_size=18) +
geom_bar(stat="identity") +
facet_grid(. ~ Batch) +
labs(title = "CD4 t-SNE Run Cytokine Dominance by Batch")
The cytokine imbalance doesn’t look too bad.
# t-SNE can take a long time, so there is a rerun_tsne switch
if(rerun_tsne) {
print("Running t-SNE")
print(Sys.time())
cd4_cytokinepos_tsne_out <- cd4_cytokinepos_data4tsne %>%
# Run CD3, co-receptor, cytokine, memory, and activation markers through t-SNE
dplyr::select("CD3 ECD", "CD8b BB700", "CD4 APC-H7",
"TNFa FITC", "CD107a PE-Cy7",
"CD154 PE-Cy5", "IL2 PE", "IL17a Ax700",
"IL4/5/13 APC", "IFNg V450",
"CCR7 BV711", "CD45RA BUV737",
"CD38 BV605", "HLADR BUV395") %>%
Rtsne::Rtsne(check_duplicates = FALSE)
print(Sys.time())
cd4_cytokinepos_dat <- cbind(as.data.frame(cd4_cytokinepos_tsne_out$Y) %>%
dplyr::rename(x = V1, y = V2),
cd4_cytokinepos_data4tsne)
if(save_output) {
print("Loading saved t-SNE run")
saveRDS(cd4_cytokinepos_dat, here::here(sprintf("out/tSNE/%s_ICS_CD4_CytokinePos_tSNE.rds", date)))
}
} else {
# Assuming t-SNE results are already saved
cd4_cytokinepos_dat <- readRDS(here::here(sprintf("out/tSNE/%s_ICS_CD4_CytokinePos_tSNE.rds", date)))
}
## [1] "Running t-SNE"
## [1] "2020-06-19 15:53:30 PDT"
## [1] "2020-06-19 15:55:14 PDT"
## [1] "Loading saved t-SNE run"
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")
cd4_cytokinepos_dat <- cd4_cytokinepos_dat %>%
mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_cytokinepos_dat <- cd4_cytokinepos_dat %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
STIM = factor(STIM, levels = c("Spike 1", "Spike 2", "NCAP", "VEMP")))
stim_labs <- c("S1", "S2", "NCAP", "VEMP")
names(stim_labs) <- c("Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")
base_tsne_plot <- function(currentColumn, pointSize = 0.2, colorScheme = NA) {
p <- ggplot(cd4_cytokinepos_dat, aes(x=x, y=y,
colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
factor(!!as.name(currentColumn))
} else {
as.logical(!!as.name(currentColumn))
})) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=14),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
if(!anyNA(colorScheme)) {
p <- p + scale_color_manual(values = colorScheme)
}
if(currentColumn %in% c("Batch", "SAMPLE ID")) {
p <- p + labs(color = currentColumn) +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(legend.title = element_text(size=15),
legend.text = element_text(size=15),
legend.position = "bottom") +
scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_cytokinepos_dat[,currentColumn])))))
} else {
p <- p + theme(legend.position = "none")
}
p
}
base_tsne_plot("Batch")
base_tsne_plot("SAMPLE ID")
Now visualize the cytokine, memory, and activation expression localization
for(cg in cd4_gates_for_tsne) {
print(base_tsne_plot(cg, colorScheme = boolColorScheme) +
labs(title = sprintf("CD4+Cytokine+ t-SNE\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}
table(cd4_cytokinepos_dat$cytokine_degree)
##
## 1 2 3 4 5
## 16152 2594 3183 1969 102
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
currentColumn <- "cytokine_degree"
pointSize <- 0.2
ggplot(cd4_cytokinepos_dat, aes(x=x, y=y, colour=!!as.name(currentColumn))) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
labs(colour="Cytokine\nDegree",
title="CD4+Cytokine+ t-SNE\nColored by Cytokine Polyfunctionality") +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=15),
legend.text = element_text(size=13),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
legend.position = "bottom") +
sc
Follow-up:
We should plot the memory breakdown of the cytokine+ events in boxplots.
We could also focus in on either the 1) IL2+ or TNF+ or CD154+ events as a group or 2) the polyfunctional COMPASS subsets, and make boxplots about their memory status.
Depending on how informative we think the activation data are, we can do the same for CD38 and HLADR.
Perform Not-CD4 analysis.
notcd4_gates_for_tsne <- c(
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+",
"/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
notcd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF")
Still need to add boolean gates.
gs_pop_add(gs, eval(substitute(flowWorkspace::booleanFilter(v),
list(v = as.symbol(paste(notcd4_cytokine_gates, collapse="|"))))),
parent = "/Time/LD-3+/1419-3+/S/Lymph/NOT4+", name = "NotCD4_CytokinePos")
## [1] 30
recompute(gs) # , "/Time/LD-3+/1419-3+/S/Lymph/NOT4+") # For some reason there is an error if I try to recompute lower in the gating tree
## ..................................................................................................................................................................................................................................................................................................................................................................................................................done!
notcd4_cytokine_pos_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/NotCD4_CytokinePos"
pop_counts <- pData(gs) %>%
left_join(gs_pop_get_count_fast(gs, subpopulations = notcd4_cytokine_pos_parentGate),
by = c("rowname" = "name")) %>%
dplyr::rename(NotCD4_CytokinePos = Count) %>%
dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, NotCD4_CytokinePos)
37C, 116C (DMSO), and 07B Spike 2 were filtered out for COMPASS due to low cell counts.
37C also has ~2-5% events in Live CD3+ gate. 07B Spike 2 has low-ish 13% Live CD3+ gate membership.
Include them all here, but note that for some of them, the low event counts means they will be under-represented in t-SNE.
head(pop_counts)
## rowname Batch SAMPLE ID STIM Cohort NotCD4_CytokinePos
## 1 112405.fcs_332150 1 15545 DMSO Hospitalized 168
## 2 112407.fcs_341600 1 15545 SEB Hospitalized 7813
## 3 112409.fcs_398100 1 15545 VEMP Hospitalized 218
## 4 112411.fcs_348925 1 15545 Spike 1 Hospitalized 174
## 5 112413.fcs_296775 1 15545 Spike 2 Hospitalized 110
## 6 112415.fcs_378400 1 15545 NCAP Hospitalized 270
sub_group_size_calculations <- pop_counts %>%
dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>% # TRIMAS are NA
group_by(Batch, Cohort, STIM) %>%
dplyr::summarise(n_Patients = n(),
min_NotCD4_CytokinePos = min(NotCD4_CytokinePos),
total_NotCD4_CytokinePos = sum(NotCD4_CytokinePos),
n_x_min_NotCD4_CytokinePos = n_Patients * min_NotCD4_CytokinePos)
sub_group_size_calculations %>%
arrange(n_x_min_NotCD4_CytokinePos) %>%
head() %>%
knitr::kable()
| Batch | Cohort | STIM | n_Patients | min_NotCD4_CytokinePos | total_NotCD4_CytokinePos | n_x_min_NotCD4_CytokinePos |
|---|---|---|---|---|---|---|
| 2 | Non-hospitalized | Spike 2 | 13 | 29 | 1129 | 377 |
| 2 | Hospitalized | Spike 2 | 7 | 68 | 1649 | 476 |
| 3 | Hospitalized | Spike 2 | 7 | 76 | 1581 | 532 |
| 1 | Hospitalized | Spike 2 | 6 | 93 | 694 | 558 |
| 1 | Non-hospitalized | Spike 2 | 13 | 43 | 2558 | 559 |
| 2 | Non-hospitalized | NCAP | 13 | 43 | 2011 | 559 |
Above: Based on the n_x_min_NotCD4_CytokinePos column, if we maintained equal representation of the patients within each group (by batch, cohort, and stim) there would be 273 events per group, leading to a total of only 377x3x4 = 4524 events.
The limiting group is: B2, Non-hospitalized, Spike 2
sub_group_size_calculations %>%
arrange(total_NotCD4_CytokinePos) %>%
head() %>%
knitr::kable()
| Batch | Cohort | STIM | n_Patients | min_NotCD4_CytokinePos | total_NotCD4_CytokinePos | n_x_min_NotCD4_CytokinePos |
|---|---|---|---|---|---|---|
| 1 | Hospitalized | Spike 2 | 6 | 93 | 694 | 558 |
| 1 | Hospitalized | Spike 1 | 6 | 166 | 1115 | 996 |
| 2 | Non-hospitalized | Spike 2 | 13 | 29 | 1129 | 377 |
| 1 | Hospitalized | NCAP | 6 | 169 | 1373 | 1014 |
| 3 | Hospitalized | Spike 2 | 7 | 76 | 1581 | 532 |
| 2 | Hospitalized | Spike 2 | 7 | 68 | 1649 | 476 |
Above: If we allow unequal patient representation, we can have up to 694 events per group.
Let’s settle for 694x3x4 = 8328 events total in the t-SNE run.
notcd4_cytokinepos_sampleSizes <- pop_counts %>%
dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>%
group_by(Batch, Cohort, STIM) %>%
nest() %>%
ungroup() %>%
mutate(nsamp = map2(694, data, function(totalEvents, df) {
distributeEvents(totalEvents, df$NotCD4_CytokinePos)
})) %>%
unnest(cols = c(data, nsamp))
Make sure all groups are the same size
notcd4_cytokinepos_sampleSizes %>%
group_by(Batch, Cohort, STIM) %>%
summarise(nsamp_sum = sum(nsamp)) %>%
dplyr::pull(nsamp_sum) %>%
unique()
## [1] 694
But keep in mind that there is lopsided patient representation even within groups:
notcd4_cytokinepos_sampleSizes %>%
arrange(nsamp) %>%
head(n=10)
## # A tibble: 10 x 7
## Batch STIM Cohort rowname `SAMPLE ID` NotCD4_CytokineP… nsamp
## <dbl> <chr> <chr> <chr> <chr> <int> <dbl>
## 1 2 Spike 2 Non-hospital… 113721.fcs_2… 161C 29 29
## 2 2 Spike 2 Non-hospital… 113781.fcs_1… 76C 37 37
## 3 2 Spike 2 Non-hospital… 113853.fcs_2… 37C 41 41
## 4 1 Spike 2 Non-hospital… 112821.fcs_1… 113C 43 43
## 5 2 NCAP Non-hospital… 113723.fcs_2… 161C 43 43
## 6 2 Spike 1 Non-hospital… 113671.fcs_1… 59C 46 46
## 7 2 Spike 2 Non-hospital… 113709.fcs_1… 157C 46 46
## 8 1 NCAP Non-hospital… 112823.fcs_1… 113C 51 51
## 9 1 VEMP Non-hospital… 112459.fcs_3… 112C 732 53
## 10 1 VEMP Non-hospital… 112484.fcs_3… 141C 223 53
notcd4_cytokinepos_sampleSizes_4plot <- notcd4_cytokinepos_sampleSizes %>%
mutate(the_label = ifelse(nsamp < 25,
paste0("Batch ", Batch, ", ", `SAMPLE ID`, ", ", STIM), NA)) %>%
mutate(Cohort = factor(Cohort,
levels = c("Non-hospitalized", "Hospitalized"),
labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(notcd4_cytokinepos_sampleSizes_4plot,
aes(factor(Cohort), nsamp, label = the_label)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.15, height = 0) +
theme_bw(base_size=20) +
labs(title="ICS Not-CD4 t-SNE patient representation",
y="Events Sampled for t-SNE") +
geom_text_repel(size=4) +
facet_grid(Batch ~ STIM) +
theme(axis.title.x = element_blank())
## Warning: Removed 236 rows containing missing values (geom_text_repel).
Reproducibility is important
set.seed(date+1)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName, currentSampleSize) {
# print(sprintf("Sampling data from %s", currentSampleName))
sampleGatingHierarchy(gs[[currentSampleName]], notcd4_cytokine_pos_parentGate, currentSampleSize, notcd4_gates_for_tsne)
}
notcd4_cytokinepos_data4tsne <- map2_dfr(notcd4_cytokinepos_sampleSizes$rowname, notcd4_cytokinepos_sampleSizes$nsamp,
call_sampleGatingHierarchy_for_cd4)
knitr::kable(head(notcd4_cytokinepos_data4tsne))
| name | EXPERIMENT NAME | $DATE | SAMPLE ID | PATIENT ID | STIM | WELL ID | PLATE NAME | filename | rowname | Sample ID | Cohort | Age | Sex | Race | Hispanic? | Days symptom onset to visit 1 | Pair ID | Batch | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/NotCD4_CytokinePos | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/154 | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2 | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17 | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513 | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+ | /Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+ | /Time/LD-3+/1419-3+/S/Lymph/CD38+ | /Time/LD-3+/1419-3+/S/Lymph/HLADR+ | Time | FSC-A | FSC-H | SSC-A | SSC-H | CD8b BB700 | TNFa FITC | CD107a PE-Cy7 | CD154 PE-Cy5 | CD3 ECD | IL2 PE | CD4 APC-H7 | IL17a Ax700 | IL4/5/13 APC | CD14/CD19 BV785 | CCR7 BV711 | CD38 BV605 | L/D | IFNg V450 | CD45RA BUV737 | HLADR BUV395 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 61.897 | 96120.24 | 84269 | 19137.39 | 18121 | 2197.9023 | 435.94394 | 1625.3967 | 1154.0245 | 3006.588 | 522.9092 | 1063.75098 | 596.0203 | 352.0423 | 1104.1075 | 2426.293 | 687.5623 | 1693.623 | 582.8617 | 2949.740 | 172.7109 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 29.699 | 85531.16 | 71826 | 20106.57 | 19686 | 691.9189 | 42.50722 | 1985.0042 | 710.8685 | 2844.340 | 640.4456 | 1038.77844 | 1241.4800 | 617.5194 | 1017.9365 | 1424.282 | 2125.5798 | 1643.731 | 435.3643 | 1390.699 | 1594.4603 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 70.564 | 126265.64 | 108899 | 24777.60 | 24179 | 765.7809 | 438.18649 | 1795.7152 | 950.7180 | 2742.777 | 788.9135 | 918.41797 | 699.7520 | 330.1357 | 912.4352 | 2287.560 | 1213.9369 | 1583.611 | 1057.1897 | 1482.738 | 452.2636 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 16.969 | 90115.48 | 77125 | 18524.04 | 18037 | 1289.5042 | 314.98084 | 183.4656 | 956.0971 | 3007.340 | 707.0035 | 98.51392 | 2338.2048 | 586.7673 | 1409.6289 | 3021.614 | 1674.9799 | 1135.599 | 638.1335 | 2886.849 | 811.1515 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 69.022 | 103021.04 | 91384 | 15381.60 | 14636 | 2025.3123 | 814.11292 | 705.6724 | 741.4208 | 2966.372 | 946.2702 | 491.29556 | 1535.7384 | 657.6039 | 1546.0616 | 2823.939 | 512.0006 | 1143.431 | 1950.6909 | 2615.035 | 291.3027 |
| 112409.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | VEMP | A06 | P2 | 15545_A6_A06_006.fcs | 112409.fcs_398100 | NA | Hospitalized | 63 | N/A | N/A | N/A | 47 | 7 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 32.179 | 123111.64 | 107516 | 26370.57 | 26124 | 1169.5574 | 257.73718 | 763.4775 | 596.2213 | 2761.780 | 527.0810 | 711.43835 | 2161.8057 | 676.6716 | 1169.7057 | 2848.102 | 1305.6493 | 1309.418 | 1014.0414 | 1405.771 | 903.6163 |
dim(notcd4_cytokinepos_data4tsne)
## [1] 16656 52
Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.
cytokine_dominance <- notcd4_cytokinepos_data4tsne %>%
group_by(Batch) %>%
summarise_at(notcd4_cytokine_gates, sum) %>%
t() %>%
as.data.frame() %>%
set_names(c("B1", "B2", "B3")) %>%
rownames_to_column("Cytokine_Gate") %>%
dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
| Cytokine_Gate | B1 | B2 | B3 |
|---|---|---|---|
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a | 1403 | 2189 | 1709 |
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/154 | 257 | 405 | 154 |
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG | 837 | 849 | 631 |
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2 | 571 | 692 | 505 |
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17 | 2491 | 778 | 1860 |
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513 | 793 | 1472 | 938 |
| /Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF | 483 | 733 | 547 |
cytokine_dominance %>%
pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>%
mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>%
ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
theme_bw(base_size=18) +
geom_bar(stat="identity") +
facet_grid(. ~ Batch) +
labs(title = "Not-CD4 t-SNE Run Cytokine Dominance by Batch")
IL17 seems to be dominating Batch 1. We know that there is high IL17 DMSO background, so we’re likely including cells which were not stimulated by an experimental peptide.
# t-SNE can take a long time, so there is a rerun_tsne switch
if(rerun_tsne) {
print("Running t-SNE")
print(Sys.time())
notcd4_cytokinepos_tsne_out <- notcd4_cytokinepos_data4tsne %>%
# Run CD3, co-receptor, cytokine, memory, and activation markers through t-SNE
dplyr::select("CD3 ECD", "CD8b BB700", "CD4 APC-H7",
"TNFa FITC", "CD107a PE-Cy7",
"CD154 PE-Cy5", "IL2 PE", "IL17a Ax700",
"IL4/5/13 APC", "IFNg V450",
"CCR7 BV711", "CD45RA BUV737",
"CD38 BV605", "HLADR BUV395") %>%
Rtsne::Rtsne(check_duplicates = FALSE)
print(Sys.time())
notcd4_cytokinepos_dat <- cbind(as.data.frame(notcd4_cytokinepos_tsne_out$Y) %>%
dplyr::rename(x = V1, y = V2),
notcd4_cytokinepos_data4tsne)
if(save_output) {
print("Loading saved t-SNE run")
saveRDS(notcd4_cytokinepos_dat, here::here(sprintf("out/tSNE/%s_ICS_NotCD4_CytokinePos_tSNE.rds", date)))
}
} else {
# Assuming t-SNE results are already saved
notcd4_cytokinepos_dat <- readRDS(here::here(sprintf("out/tSNE/%s_ICS_NotCD4_CytokinePos_tSNE.rds", date)))
}
## [1] "Running t-SNE"
## [1] "2020-06-19 15:57:02 PDT"
## [1] "2020-06-19 15:58:12 PDT"
## [1] "Loading saved t-SNE run"
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")
notcd4_cytokinepos_dat <- notcd4_cytokinepos_dat %>%
mutate(cytokine_degree = rowSums(dplyr::select(., all_of(notcd4_cytokine_gates))))
notcd4_cytokinepos_dat <- notcd4_cytokinepos_dat %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
STIM = factor(STIM, levels = c("Spike 1", "Spike 2", "NCAP", "VEMP")))
stim_labs <- c("S1", "S2", "NCAP", "VEMP")
names(stim_labs) <- c("Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")
base_tsne_plot <- function(currentColumn, pointSize = 0.2, colorScheme = NA) {
p <- ggplot(notcd4_cytokinepos_dat, aes(x=x, y=y,
colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
factor(!!as.name(currentColumn))
} else {
as.logical(!!as.name(currentColumn))
})) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=14),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
if(!anyNA(colorScheme)) {
p <- p + scale_color_manual(values = colorScheme)
}
if(currentColumn %in% c("Batch", "SAMPLE ID")) {
p <- p + labs(color = currentColumn) +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(legend.title = element_text(size=15),
legend.text = element_text(size=15),
legend.position = "bottom") +
scale_colour_manual(values=as.character(iwanthue(length(unique(notcd4_cytokinepos_dat[,currentColumn])))))
} else {
p <- p + theme(legend.position = "none")
}
p
}
base_tsne_plot("Batch")
base_tsne_plot("SAMPLE ID")
Now visualize the cytokine, memory, and activation expression localization
for(cg in notcd4_gates_for_tsne) {
print(base_tsne_plot(cg, colorScheme = boolColorScheme) +
labs(title = sprintf("NotCD4+Cytokine+ t-SNE\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}
table(notcd4_cytokinepos_dat$cytokine_degree)
##
## 1 2 3 4 5 6
## 14270 1456 647 243 38 2
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
currentColumn <- "cytokine_degree"
pointSize <- 0.2
ggplot(notcd4_cytokinepos_dat, aes(x=x, y=y, colour=!!as.name(currentColumn))) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
labs(colour="Cytokine\nDegree",
title="NotCD4+Cytokine+ t-SNE\nColored by Cytokine Polyfunctionality") +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=15),
legend.text = element_text(size=13),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
legend.position = "bottom") +
sc
Since the Not-CD4 gate contains both CD4-high, CD4-low, CD8-high, and CD8-low events.
notcd4_cytokinepos_dat %>%
ggplot(aes(x=`CD4 APC-H7`, color=factor(Batch), fill=factor(Batch))) +
geom_histogram(alpha=0.2, position="identity") +
geom_vline(xintercept = -1000) +
geom_vline(xintercept = 2500)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
notcd4_cytokinepos_dat %>%
ggplot(aes(x=`CD8b BB700`, color=factor(Batch), fill=factor(Batch))) +
geom_histogram(alpha=0.2, position="identity") +
geom_vline(xintercept = -3000) +
geom_vline(xintercept = 7000)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
map2(.x = c("CD4 APC-H7", "CD8b BB700"),
.y = list(c(-1000, 2500), c(-2500, 6500)),
.f = function(currentColumn, currentLimits) {
pointSize <- 0.4
print(ggplot(notcd4_cytokinepos_dat, aes(x=x, y=y, colour=!!as.name(currentColumn))) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
labs(colour=currentColumn,
title=sprintf("NotCD4+Cytokine+ t-SNE\nColored by %s mfi", currentColumn)) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=15),
legend.text = element_text(size=6),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
legend.position = "bottom") +
# values lower or higher than the limits get converted to min(limits) and max(limits), respectively
scale_colour_gradient(low = "red", high = "green", oob=squish, limits=currentLimits))
})
## [[1]]
##
## [[2]]